import numpy as np
import pandas as pd
import glob
path = '/home/yurli/Molnlycke/case2/csv'
all_files = glob.glob(path + "/*.csv")
import os
df_names = []
df = {}
for filename in all_files:
df_name = os.path.splitext(os.path.basename(filename))[0]
df[df_name] = pd.read_csv(filename, index_col = None, header = 0)
df_names.append(df_name)
print(df_name, df[df_name].shape)
medications (42989, 13) providers (5855, 12) payer_transitions (3801, 5) imaging_studies (855, 10) supplies (0, 6) payers (10, 21) allergies (597, 6) procedures (34981, 8) organizations (1119, 11) conditions (8376, 6) careplans (3483, 9) encounters (53346, 15) devices (78, 7) immunizations (15478, 6) patients (1171, 25) observations (299697, 8)
# len(set(df['patients']['Id'])) == df['patients'].shape[0]
patientID = '76982e06-f8b8-4509-9ca3-65a99c8650fe'
df['patients']['Name'] = df['patients']['FIRST'] + ' ' + df['patients']['LAST']
zipCode = []
for i in df['patients']['ZIP']:
try:
zipCode.append(str(int(i)))
except:
zipCode.append('')
df['patients']['ZIP'] = zipCode
df['patients']['Address'] = df['patients']['ADDRESS'] + ', ' + df['patients']['ZIP'] + ' ' + df['patients']['CITY'] + ', ' + df['patients']['STATE'] + ', ' + df['patients']['COUNTY']
df['patients']['MARITAL'] = df['patients']['MARITAL'].fillna('Unknown')
import dateutil.parser
birth = []
for patient in df['encounters']['PATIENT']:
birthDate = df['patients'][df['patients']['Id'] == patient]['BIRTHDATE'].values[0]
birthYear = list(pd.DatetimeIndex([birthDate]).year)
birth.append(birthYear)
df['encounters']['birthYear'] = np.asarray(birth).reshape(-1)
df['encounters']['encounterYear'] = pd.DatetimeIndex(df['encounters']['START']).year
df['encounters']['age'] = df['encounters']['encounterYear'] - df['encounters']['birthYear']
df['encounters']['START'] = [dateutil.parser.parse(x).strftime("%Y-%m-%d") for x in df['encounters']['START']]
df['encounters']['STOP'] = [dateutil.parser.parse(x).strftime("%Y-%m-%d") for x in df['encounters']['STOP']]
df['observations']['UNITS'] = df['observations']['UNITS'].fillna('')
df['observations']['DATE'] = [dateutil.parser.parse(x).strftime("%Y-%m-%d %H:%M:%S") for x in df['observations']['DATE']]
df['observations']['observations'] = df['observations']['DATE'] + ': ' + df['observations']['DESCRIPTION'] + ': ' + df['observations']['VALUE'] + ' ' + df['observations']['UNITS']
df['conditions']['STOP'] = df['conditions']['STOP'].fillna('')
df['medications']['START'] = [dateutil.parser.parse(x).strftime("%Y-%m-%d") for x in df['medications']['START']]
stop = []
for x in df['medications']['STOP']:
try:
stop.append(dateutil.parser.parse(x).strftime("%Y-%m-%d"))
except:
stop.append('')
df['medications']['STOP'] = stop
df['careplans']['STOP'] = df['careplans']['STOP'].fillna('')
df['immunizations']['DATE'] = [dateutil.parser.parse(x).strftime("%Y-%m-%d") for x in df['immunizations']['DATE']]
df['procedures']['DATE'] = [dateutil.parser.parse(x).strftime("%Y-%m-%d") for x in df['procedures']['DATE']]
# df['imaging_studies']['DATE'] = [dateutil.parser.parse(x).strftime("%Y-%m-%d") for x in df['imaging_studies']['DATE']]
def care_data_collection(patientID):
encounters_sub = df['encounters'][df['encounters']['PATIENT'] == patientID]
observations_sub = df['observations'][df['observations']['PATIENT'] == patientID]
conditions_sub = df['conditions'][df['conditions']['PATIENT'] == patientID]
medications_sub = df['medications'][df['medications']['PATIENT'] == patientID]
careplans_sub = df['careplans'][df['careplans']['PATIENT'] == patientID]
immunizations_sub = df['immunizations'][df['immunizations']['PATIENT'] == patientID]
procedures_sub = df['procedures'][df['procedures']['PATIENT'] == patientID]
return encounters_sub, observations_sub, conditions_sub, medications_sub, careplans_sub, immunizations_sub, procedures_sub
separator = ", "
def patient_Data_printer(patientID):
name = df['patients'][df['patients']['Id'] == patientID]['Name'].values[0]
race = df['patients'][df['patients']['Id'] == patientID]['RACE'].values[0]
ethnicity = df['patients'][df['patients']['Id'] == patientID]['ETHNICITY'].values[0]
gender = df['patients'][df['patients']['Id'] == patientID]['GENDER'].values[0]
birthDate = df['patients'][df['patients']['Id'] == patientID]['BIRTHDATE'].values[0]
ifMarital = df['patients'][df['patients']['Id'] == patientID]['MARITAL'].values[0]
address = df['patients'][df['patients']['Id'] == patientID]['Address'].values[0]
patient_allergies = 'N/A'
if df['allergies'][df['allergies']['PATIENT'] == patientID].shape[0] != 0:
patient_allergies = separator.join(df['allergies'][df['allergies']['PATIENT'] == patientID]['DESCRIPTION'])
print(name)
print('============================')
print('Race: ' + race)
print('Ethnicity: ' + ethnicity)
print('Gender: ' + gender)
print('Birth Date: ' + birthDate)
print('Marital Status: ' + ifMarital)
print('Address: ' + address)
print('==========================================================================================')
print('Allergies: ' + patient_allergies)
print('==========================================================================================')
encounter_sub, observation_sub, conditions_sub, medications_sub, careplans_sub, immunizations_sub, procedures_sub = care_data_collection(patientID)
for encounter_id in encounter_sub['Id']:
observation_perEncounter = observation_sub[observation_sub['ENCOUNTER'] == encounter_id]
condition_perEncounter = conditions_sub[conditions_sub['ENCOUNTER'] == encounter_id]
medication_perEncounter = medications_sub[medications_sub['ENCOUNTER'] == encounter_id]
careplan_perEncounter = careplans_sub[careplans_sub['ENCOUNTER'] == encounter_id]
immunization_perEncounter = immunizations_sub[immunizations_sub['ENCOUNTER'] == encounter_id]
procedure_perEncounter = procedures_sub[procedures_sub['ENCOUNTER'] == encounter_id]
print('Encounter: ')
print(encounter_sub[encounter_sub['Id'] == encounter_id]['START'].values[0] + ': ' +
encounter_sub[encounter_sub['Id'] == encounter_id]['DESCRIPTION'].values[0] + ' (class: ' +
encounter_sub[encounter_sub['Id'] == encounter_id]['ENCOUNTERCLASS'].values[0] + ')')
if observation_perEncounter.shape[0] != 0:
print('Observations:')
print(*observation_perEncounter['observations'], sep = "\n")
if condition_perEncounter.shape[0] != 0:
print('Condition:')
print(condition_perEncounter['START'].values[0] + ' -- ' + condition_perEncounter['STOP'].values[0] + ': ' +
condition_perEncounter['DESCRIPTION'].values[0])
if medication_perEncounter.shape[0] != 0:
print('Medications:')
print(medication_perEncounter['START'].values[0] + ' -- ' + medication_perEncounter['STOP'].values[0] + ': ' +
medication_perEncounter['DESCRIPTION'].values[0])
if careplan_perEncounter.shape[0] != 0:
print('Care Plans:')
print(careplan_perEncounter['START'].values[0] + ' -- ' + careplan_perEncounter['STOP'].values[0] + ': ' +
careplan_perEncounter['DESCRIPTION'].values[0])
if immunization_perEncounter.shape[0] != 0:
print('Immunization:')
print(immunization_perEncounter['DATE'].values[0] + ': ' + immunization_perEncounter['DESCRIPTION'].values[0])
if procedure_perEncounter.shape[0] != 0:
print('Procedure:')
print(procedure_perEncounter['DATE'].values[0] + ': ' + procedure_perEncounter['DESCRIPTION'].values[0])
print('------------------------------------------------------------------------------------------')
patient_Data_printer(patientID)
Christal240 Brown30
============================
Race: white
Ethnicity: nonhispanic
Gender: F
Birth Date: 1982-09-01
Marital Status: S
Address: 1060 Hansen Overpass Suite 86, 2118 Boston, Massachusetts, Suffolk County
==========================================================================================
Allergies: Latex allergy, Shellfish allergy
==========================================================================================
Encounter:
1982-10-25: Encounter for problem (class: ambulatory)
Medications:
1982-10-25 -- : diphenhydrAMINE Hydrochloride 25 MG Oral Tablet
Care Plans:
1982-10-25 -- : Self-care interventions (procedure)
------------------------------------------------------------------------------------------
Encounter:
2000-06-14: Encounter for problem (class: ambulatory)
Medications:
2000-06-14 -- : ferrous sulfate 325 MG Oral Tablet
------------------------------------------------------------------------------------------
Encounter:
2010-03-27: Consultation for treatment (class: outpatient)
Medications:
2010-03-27 -- 2011-03-22: Etonogestrel 68 MG Drug Implant
------------------------------------------------------------------------------------------
Encounter:
2010-07-07: Encounter for symptom (class: ambulatory)
Observations:
2010-07-07 18:19:08: Body temperature: 37.1 Cel
Condition:
2010-07-07 -- 2010-07-17: Acute viral pharyngitis (disorder)
Procedure:
2010-07-07: Throat culture (procedure)
------------------------------------------------------------------------------------------
Encounter:
2010-11-10: General examination of patient (procedure) (class: wellness)
Observations:
2010-11-10 18:19:08: Body Height: 162.4 cm
2010-11-10 18:19:08: Pain severity - 0-10 verbal numeric rating [Score] - Reported: 2.0 {score}
2010-11-10 18:19:08: Body Weight: 71.2 kg
2010-11-10 18:19:08: Body Mass Index: 27.0 kg/m2
2010-11-10 18:19:08: Diastolic Blood Pressure: 79.0 mm[Hg]
2010-11-10 18:19:08: Systolic Blood Pressure: 117.0 mm[Hg]
2010-11-10 18:19:08: Heart rate: 100.0 /min
2010-11-10 18:19:08: Respiratory rate: 12.0 /min
2010-11-10 18:19:08: Leukocytes [#/volume] in Blood by Automated count: 4.4 10*3/uL
2010-11-10 18:19:08: Erythrocytes [#/volume] in Blood by Automated count: 4.2 10*6/uL
2010-11-10 18:19:08: Hemoglobin [Mass/volume] in Blood: 13.7 g/dL
2010-11-10 18:19:08: Hematocrit [Volume Fraction] of Blood by Automated count: 41.4 %
2010-11-10 18:19:08: MCV [Entitic volume] by Automated count: 91.8 fL
2010-11-10 18:19:08: MCH [Entitic mass] by Automated count: 29.5 pg
2010-11-10 18:19:08: MCHC [Mass/volume] by Automated count: 33.7 g/dL
2010-11-10 18:19:08: Erythrocyte distribution width [Entitic volume] by Automated count: 40.1 fL
2010-11-10 18:19:08: Platelets [#/volume] in Blood by Automated count: 276.0 10*3/uL
2010-11-10 18:19:08: Platelet distribution width [Entitic volume] in Blood by Automated count: 381.6 fL
2010-11-10 18:19:08: Platelet mean volume [Entitic volume] in Blood by Automated count: 10.3 fL
2010-11-10 18:19:08: Tobacco smoking status NHIS: Never smoker
Immunization:
2010-11-10: Influenza seasonal injectable preservative free
------------------------------------------------------------------------------------------
Encounter:
2011-06-08: Patient encounter procedure (class: outpatient)
Procedure:
2011-06-08: Removal of subcutaneous contraceptive
------------------------------------------------------------------------------------------
Encounter:
2011-08-03: Prenatal initial visit (class: ambulatory)
Condition:
2011-08-03 -- 2012-02-29: Normal pregnancy
Care Plans:
2011-08-03 -- 2012-02-29: Routine antenatal care
Procedure:
2011-08-03: Standard pregnancy test
------------------------------------------------------------------------------------------
Encounter:
2011-08-31: Prenatal visit (class: ambulatory)
Procedure:
2011-08-31: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter:
2011-09-28: Prenatal visit (class: ambulatory)
Procedure:
2011-09-28: Fetal anatomy study
------------------------------------------------------------------------------------------
Encounter:
2011-10-26: Prenatal visit (class: ambulatory)
Procedure:
2011-10-26: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter:
2011-11-23: Prenatal visit (class: ambulatory)
Procedure:
2011-11-23: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter:
2011-12-21: Prenatal visit (class: ambulatory)
Procedure:
2011-12-21: Hemoglobin / Hematocrit / Platelet count
------------------------------------------------------------------------------------------
Encounter:
2012-01-18: Prenatal visit (class: ambulatory)
Procedure:
2012-01-18: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter:
2012-02-15: Prenatal visit (class: ambulatory)
Procedure:
2012-02-15: Streptococcus pneumoniae group B antigen test
------------------------------------------------------------------------------------------
Encounter:
2012-02-29: Obstetric emergency hospital admission (class: emergency)
Procedure:
2012-02-29: Episiotomy
------------------------------------------------------------------------------------------
Encounter:
2012-04-11: Postnatal visit (class: ambulatory)
Procedure:
2012-04-11: Physical examination following birth
------------------------------------------------------------------------------------------
Encounter:
2012-05-13: Encounter for symptom (class: outpatient)
Condition:
2012-05-13 -- 2013-11-13: Otitis media
Medications:
2012-05-13 -- 2012-05-27: Cefuroxime 250 MG Oral Tablet
------------------------------------------------------------------------------------------
Encounter:
2012-07-18: Prenatal initial visit (class: ambulatory)
Condition:
2012-07-18 -- 2012-08-08: Normal pregnancy
Procedure:
2012-07-18: Standard pregnancy test
------------------------------------------------------------------------------------------
Encounter:
2012-08-01: Patient-initiated encounter (class: ambulatory)
Procedure:
2012-08-01: Counseling for termination of pregnancy
------------------------------------------------------------------------------------------
Encounter:
2012-08-08: Prenatal visit (class: ambulatory)
Procedure:
2012-08-08: Pregnancy termination care
------------------------------------------------------------------------------------------
Encounter:
2012-09-24: Encounter for symptom (class: ambulatory)
Condition:
2012-09-24 -- 2012-11-01: Escherichia coli urinary tract infection
Medications:
2012-10-25 -- 2012-11-01: Nitrofurantoin 5 MG/ML Oral Suspension
Care Plans:
2012-10-25 -- 2012-11-01: Urinary tract infection care
------------------------------------------------------------------------------------------
Encounter:
2013-01-09: Prenatal initial visit (class: ambulatory)
Condition:
2013-01-09 -- 2013-08-14: Normal pregnancy
Care Plans:
2013-01-09 -- 2013-08-14: Routine antenatal care
Procedure:
2013-01-09: Standard pregnancy test
------------------------------------------------------------------------------------------
Encounter:
2013-01-07: Encounter for symptom (class: ambulatory)
Condition:
2013-01-07 -- 2013-01-21: Viral sinusitis (disorder)
------------------------------------------------------------------------------------------
Encounter:
2013-02-06: Prenatal visit (class: ambulatory)
Procedure:
2013-02-06: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter:
2013-03-06: Prenatal visit (class: ambulatory)
Procedure:
2013-03-06: Fetal anatomy study
------------------------------------------------------------------------------------------
Encounter:
2013-04-03: Prenatal visit (class: ambulatory)
Procedure:
2013-04-03: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter:
2013-05-01: Prenatal visit (class: ambulatory)
Procedure:
2013-05-01: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter:
2013-05-29: Prenatal visit (class: ambulatory)
Procedure:
2013-05-29: Hemoglobin / Hematocrit / Platelet count
------------------------------------------------------------------------------------------
Encounter:
2013-06-26: Prenatal visit (class: ambulatory)
Procedure:
2013-06-26: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter:
2013-07-24: Prenatal visit (class: ambulatory)
Procedure:
2013-07-24: Streptococcus pneumoniae group B antigen test
------------------------------------------------------------------------------------------
Encounter:
2013-08-07: Prenatal visit (class: ambulatory)
Procedure:
2013-08-07: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter:
2013-08-14: Obstetric emergency hospital admission (class: emergency)
Procedure:
2013-08-14: Childbirth
------------------------------------------------------------------------------------------
Encounter:
2013-09-25: Postnatal visit (class: ambulatory)
Procedure:
2013-09-25: Physical examination following birth
------------------------------------------------------------------------------------------
Encounter:
2013-11-13: General examination of patient (procedure) (class: wellness)
Observations:
2013-11-13 18:19:08: Body Height: 162.4 cm
2013-11-13 18:19:08: Pain severity - 0-10 verbal numeric rating [Score] - Reported: 4.0 {score}
2013-11-13 18:19:08: Body Weight: 76.0 kg
2013-11-13 18:19:08: Body Mass Index: 28.8 kg/m2
2013-11-13 18:19:08: Diastolic Blood Pressure: 83.0 mm[Hg]
2013-11-13 18:19:08: Systolic Blood Pressure: 123.0 mm[Hg]
2013-11-13 18:19:08: Heart rate: 97.0 /min
2013-11-13 18:19:08: Respiratory rate: 15.0 /min
2013-11-13 18:19:08: Total Cholesterol: 188.8 mg/dL
2013-11-13 18:19:08: Triglycerides: 144.6 mg/dL
2013-11-13 18:19:08: Low Density Lipoprotein Cholesterol: 81.4 mg/dL
2013-11-13 18:19:08: High Density Lipoprotein Cholesterol: 78.5 mg/dL
2013-11-13 18:19:08: Tobacco smoking status NHIS: Never smoker
Immunization:
2013-11-13: Influenza seasonal injectable preservative free
------------------------------------------------------------------------------------------
Encounter:
2014-01-23: Encounter for symptom (class: ambulatory)
Condition:
2014-01-23 -- 2014-01-30: Viral sinusitis (disorder)
------------------------------------------------------------------------------------------
Encounter:
2015-03-24: Encounter for symptom (class: ambulatory)
Observations:
2015-03-24 18:19:08: Body temperature: 37.8 Cel
Condition:
2015-03-24 -- 2015-04-03: Acute viral pharyngitis (disorder)
------------------------------------------------------------------------------------------
Encounter:
2015-11-28: Emergency room admission (procedure) (class: emergency)
Condition:
2015-11-28 -- 2016-01-02: Whiplash injury to neck
Medications:
2015-11-28 -- 2016-01-02: Naproxen sodium 220 MG Oral Tablet
Care Plans:
2015-11-28 -- 2016-01-02: Musculoskeletal care
------------------------------------------------------------------------------------------
Encounter:
2016-09-09: Consultation for treatment (class: outpatient)
Medications:
2016-09-09 -- 2017-09-04: Errin 28 Day Pack
------------------------------------------------------------------------------------------
Encounter:
2016-11-03: Encounter for symptom (class: ambulatory)
Condition:
2016-11-03 -- 2016-11-17: Acute bronchitis (disorder)
Medications:
2016-11-03 -- 2016-11-17: Acetaminophen 325 MG Oral Tablet
Care Plans:
2016-11-03 -- 2019-11-20: Respiratory therapy
Procedure:
2016-11-03: Sputum examination (procedure)
------------------------------------------------------------------------------------------
Encounter:
2016-11-16: General examination of patient (procedure) (class: wellness)
Observations:
2016-11-16 18:19:08: Body Height: 162.4 cm
2016-11-16 18:19:08: Pain severity - 0-10 verbal numeric rating [Score] - Reported: 2.0 {score}
2016-11-16 18:19:08: Body Weight: 79.1 kg
2016-11-16 18:19:08: Body Mass Index: 30.0 kg/m2
2016-11-16 18:19:08: Diastolic Blood Pressure: 73.0 mm[Hg]
2016-11-16 18:19:08: Systolic Blood Pressure: 117.0 mm[Hg]
2016-11-16 18:19:08: Heart rate: 77.0 /min
2016-11-16 18:19:08: Respiratory rate: 16.0 /min
2016-11-16 18:19:08: Total Cholesterol: 177.5 mg/dL
2016-11-16 18:19:08: Triglycerides: 123.1 mg/dL
2016-11-16 18:19:08: Low Density Lipoprotein Cholesterol: 88.3 mg/dL
2016-11-16 18:19:08: High Density Lipoprotein Cholesterol: 64.6 mg/dL
2016-11-16 18:19:08: Leukocytes [#/volume] in Blood by Automated count: 4.2 10*3/uL
2016-11-16 18:19:08: Erythrocytes [#/volume] in Blood by Automated count: 4.7 10*6/uL
2016-11-16 18:19:08: Hemoglobin [Mass/volume] in Blood: 17.0 g/dL
2016-11-16 18:19:08: Hematocrit [Volume Fraction] of Blood by Automated count: 40.2 %
2016-11-16 18:19:08: MCV [Entitic volume] by Automated count: 94.6 fL
2016-11-16 18:19:08: MCH [Entitic mass] by Automated count: 31.4 pg
2016-11-16 18:19:08: MCHC [Mass/volume] by Automated count: 35.7 g/dL
2016-11-16 18:19:08: Erythrocyte distribution width [Entitic volume] by Automated count: 40.0 fL
2016-11-16 18:19:08: Platelets [#/volume] in Blood by Automated count: 388.0 10*3/uL
2016-11-16 18:19:08: Platelet distribution width [Entitic volume] in Blood by Automated count: 244.0 fL
2016-11-16 18:19:08: Platelet mean volume [Entitic volume] in Blood by Automated count: 9.9 fL
2016-11-16 18:19:08: Tobacco smoking status NHIS: Never smoker
Condition:
2016-11-16 -- : Body mass index 30+ - obesity (finding)
Immunization:
2016-11-16: Influenza seasonal injectable preservative free
------------------------------------------------------------------------------------------
Encounter:
2017-09-04: Consultation for treatment (class: outpatient)
------------------------------------------------------------------------------------------
Encounter:
2017-09-11: Admission to surgical department (class: inpatient)
Care Plans:
2017-09-11 -- 2017-09-25: Minor surgery care management (procedure)
Procedure:
2017-09-11: Bilateral tubal ligation
------------------------------------------------------------------------------------------
Encounter:
2017-12-17: Encounter for symptom (class: ambulatory)
Condition:
2017-12-17 -- 2017-12-24: Viral sinusitis (disorder)
------------------------------------------------------------------------------------------
Encounter:
2019-11-20: General examination of patient (procedure) (class: wellness)
Observations:
2019-11-20 18:19:08: Body Height: 162.4 cm
2019-11-20 18:19:08: Pain severity - 0-10 verbal numeric rating [Score] - Reported: 1.0 {score}
2019-11-20 18:19:08: Body Weight: 77.5 kg
2019-11-20 18:19:08: Body Mass Index: 29.4 kg/m2
2019-11-20 18:19:08: Diastolic Blood Pressure: 79.0 mm[Hg]
2019-11-20 18:19:08: Systolic Blood Pressure: 127.0 mm[Hg]
2019-11-20 18:19:08: Heart rate: 77.0 /min
2019-11-20 18:19:08: Respiratory rate: 12.0 /min
2019-11-20 18:19:08: Total Cholesterol: 187.6 mg/dL
2019-11-20 18:19:08: Triglycerides: 142.3 mg/dL
2019-11-20 18:19:08: Low Density Lipoprotein Cholesterol: 97.1 mg/dL
2019-11-20 18:19:08: High Density Lipoprotein Cholesterol: 62.0 mg/dL
2019-11-20 18:19:08: Tobacco smoking status NHIS: Never smoker
Immunization:
2019-11-20: Influenza seasonal injectable preservative free
Procedure:
2019-11-20: Medication Reconciliation (procedure)
------------------------------------------------------------------------------------------
encounters, observations, conditions, medications, careplans, immunizations, procedures = care_data_collection(patientID)
encounters['featureName'] = ['Encounters'] * encounters.shape[0]
conditions['featureName'] = ['Conditions'] * conditions.shape[0]
medications['featureName'] = ['Medications'] * medications.shape[0]
careplans['featureName'] = ['Careplans'] * careplans.shape[0]
immunizations['featureName'] = ['Immunizations'] * immunizations.shape[0]
procedures['featureName'] = ['Procedures'] * procedures.shape[0]
/tmp/ipykernel_91928/1069656180.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy encounters['featureName'] = ['Encounters'] * encounters.shape[0] /tmp/ipykernel_91928/1069656180.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy conditions['featureName'] = ['Conditions'] * conditions.shape[0] /tmp/ipykernel_91928/1069656180.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy medications['featureName'] = ['Medications'] * medications.shape[0] /tmp/ipykernel_91928/1069656180.py:4: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy careplans['featureName'] = ['Careplans'] * careplans.shape[0] /tmp/ipykernel_91928/1069656180.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy immunizations['featureName'] = ['Immunizations'] * immunizations.shape[0] /tmp/ipykernel_91928/1069656180.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy procedures['featureName'] = ['Procedures'] * procedures.shape[0]
immunizations = immunizations.rename(columns = {"DATE": "START"})
procedures = procedures.rename(columns = {"DATE": "START"})
col_toUse = ['START', 'STOP', 'DESCRIPTION', 'featureName']
encounters_toUse = encounters[col_toUse + ['ENCOUNTERCLASS']]
conditions_toUse = conditions[col_toUse]
careplans_toUse = careplans[col_toUse]
medications_toUse = medications[col_toUse]
immunizations_toUse = immunizations[['START', 'DESCRIPTION', 'featureName']]
procedures_toUse = procedures[['START', 'DESCRIPTION', 'featureName']]
df_toUse = pd.concat([medications_toUse, careplans_toUse, conditions_toUse]).fillna('')
import plotly.express as px
fig = px.timeline(df_toUse.sort_values('START'),
x_start = "START",
x_end = "STOP",
y = "featureName",
text = "DESCRIPTION",
color = "featureName",
width = 2000, height = 300)
for i in range(encounters_toUse.shape[0]):
if encounters_toUse['ENCOUNTERCLASS'].values[i] == 'urgentcare':
fig.add_vline(x = encounters_toUse['START'].values[i], line_width = 1, line_dash = "dash", line_color = "orange")
if encounters_toUse['ENCOUNTERCLASS'].values[i] == 'emergency':
fig.add_vline(x = encounters_toUse['START'].values[i], line_width = 1, line_dash = "dash", line_color = "red")
if encounters_toUse['ENCOUNTERCLASS'].values[i] == 'inpatient':
fig.add_vline(x = encounters_toUse['START'].values[i], line_width = 1, line_dash = "dash", line_color = "yellow")
if encounters_toUse['ENCOUNTERCLASS'].values[i] == 'outpatient':
fig.add_vline(x = encounters_toUse['START'].values[i], line_width = 1, line_dash = "dash", line_color = "cyan")
if encounters_toUse['ENCOUNTERCLASS'].values[i] == 'ambulatory':
fig.add_vline(x = encounters_toUse['START'].values[i], line_width = 1, line_dash = "dash", line_color = "lightblue")
if encounters_toUse['ENCOUNTERCLASS'].values[i] == 'wellness':
fig.add_vline(x = encounters_toUse['START'].values[i], line_width = 1, line_dash = "dash", line_color = "lightgreen")
fig.add_annotation(x = encounters_toUse['START'].values[i], y = 1, yref = "paper", text = encounters_toUse['ENCOUNTERCLASS'].values[i])
fig.add_scatter(x = immunizations_toUse['START'], y = ['Immunizations'] * immunizations_toUse.shape[0],
mode = "markers", name = "Immunizations", marker = dict(size = 10))
fig.add_scatter(x = procedures_toUse['START'], y = ['Procedures'] * procedures_toUse.shape[0],
mode = "markers", name = "Procedures", marker = dict(size = 10))
fig.update_layout(font = dict(family="Courier New, monospace", size = 10))
fig.update_yaxes(title = '', showticklabels = True)
fig.show()
# remove a patient with condition on multiple encounters
conditions_byPatient = df['conditions'].drop_duplicates(subset = ['PATIENT', 'DESCRIPTION'],
keep = 'first').reset_index(drop = True)
condition_rank = pd.DataFrame(conditions_byPatient['DESCRIPTION'].value_counts()).reset_index()
condition_rank.columns = ['Conditions', 'Freq']
condition_rank.head(3)
| Conditions | Freq | |
|---|---|---|
| 0 | Viral sinusitis (disorder) | 743 |
| 1 | Acute viral pharyngitis (disorder) | 492 |
| 2 | Acute bronchitis (disorder) | 464 |
import holoviews as hv
hv.extension('bokeh')
bars = hv.Bars(data = condition_rank)
bars.opts(width = 2000, height = 800, xrotation = 90, title = 'Conditions Rank', ylabel = 'Amount')
top3 = list(condition_rank.head(3)['Conditions'].values)
conditions_top3 = conditions_byPatient[conditions_byPatient['DESCRIPTION'].isin(list(condition_rank.head(3)['Conditions'].values))]
race = []
ethnicity = []
gender = []
ifMarital = []
for patient in conditions_top3['PATIENT']:
race.append(df['patients'][df['patients']['Id'] == patient]['RACE'].values[0])
ethnicity.append(df['patients'][df['patients']['Id'] == patient]['ETHNICITY'].values[0])
gender.append(df['patients'][df['patients']['Id'] == patient]['GENDER'].values[0])
ifMarital.append(df['patients'][df['patients']['Id'] == patient]['MARITAL'].values[0])
conditions_top3['race'] = race
conditions_top3['ethnicity'] = ethnicity
conditions_top3['gender'] = gender
conditions_top3['ifMarital'] = ifMarital
conditions_top3['ifMarital'] = conditions_top3['ifMarital'].fillna('Unknown')
/tmp/ipykernel_91928/817431530.py:14: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /tmp/ipykernel_91928/817431530.py:15: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /tmp/ipykernel_91928/817431530.py:16: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /tmp/ipykernel_91928/817431530.py:17: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /tmp/ipykernel_91928/817431530.py:18: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
conditions_top3_age = df['conditions'][df['conditions']['DESCRIPTION'].isin(top3)]
birthYear = []
for patient in conditions_top3_age['PATIENT']:
birthDate = df['patients'][df['patients']['Id'] == patient]['BIRTHDATE'].values[0]
birthYear.append(list(pd.DatetimeIndex([birthDate]).year))
conditions_top3_age['birthYear'] = np.asarray(birthYear).reshape(-1)
conditions_top3_age['conditionYear'] = pd.DatetimeIndex(conditions_top3_age['START']).year
conditions_top3_age['age'] = conditions_top3_age['conditionYear'] - conditions_top3_age['birthYear']
/tmp/ipykernel_91928/3164922842.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /tmp/ipykernel_91928/3164922842.py:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /tmp/ipykernel_91928/3164922842.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig1 = make_subplots(rows = 1, cols = 3, specs=[[{'type':'domain'}]*3], subplot_titles = (top3))
fig2 = make_subplots(rows = 1, cols = 3, specs=[[{'type':'domain'}]*3], subplot_titles = (top3))
fig3 = make_subplots(rows = 1, cols = 3, specs=[[{'type':'domain'}]*3], subplot_titles = (top3))
fig4 = make_subplots(rows = 1, cols = 3, specs=[[{'type':'domain'}]*3], subplot_titles = (top3))
fig5 = make_subplots(rows = 1, cols = 3, subplot_titles = (top3))
for i in range(3):
cond_df = conditions_top3[conditions_top3['DESCRIPTION'] == top3[i]]
gender = pd.DataFrame(cond_df['gender'].value_counts()).reset_index()
race = pd.DataFrame(cond_df['race'].value_counts()).reset_index()
ethnicity = pd.DataFrame(cond_df['ethnicity'].value_counts()).reset_index()
ifMarital = pd.DataFrame(cond_df['ifMarital'].value_counts()).reset_index()
fig1.add_trace(
go.Pie(values = list(gender['gender'].values), labels = list(gender['index'].values)),
row = 1, col = (i+1))
fig2.add_trace(
go.Pie(values = list(race['race'].values), labels = list(race['index'].values)),
row = 1, col = (i+1))
fig3.add_trace(
go.Pie(values = list(ethnicity['ethnicity'].values), labels = list(ethnicity['index'].values)),
row = 1, col = (i+1))
fig4.add_trace(
go.Pie(values = list(ifMarital['ifMarital'].values), labels = list(ifMarital['index'].values)),
row = 1, col = (i+1))
fig5.add_trace(
go.Histogram(x = conditions_top3_age[conditions_top3_age['DESCRIPTION'] == top3[i]]['age'].values,
xbins = dict(
start = 0, end = 100, size = 10), # M18 stands for 18 months
autobinx = False
),
row = 1, col = (i+1))
fig1.update_layout(title_text = "Gender")
fig1.show()
fig2.update_layout(title_text = "Race")
fig2.show()
fig3.update_layout(title_text = "Ethnicity")
fig3.show()
fig4.update_layout(title_text = "Marital Status")
fig4.show()
fig5.update_layout(width = 1000, height = 450, showlegend = False, title_text = "Age")
fig5.show()
encounterClass_eachCase = []
medication_eachCase = []
careplan_eachCase = []
# immunization_eachCase = []
for i in range(conditions_top3_age.shape[0]):
el = df['encounters'][(df['encounters']['PATIENT'] == conditions_top3_age['PATIENT'].values[i])
& (df['encounters']['Id'] == conditions_top3_age['ENCOUNTER'].values[i])]
encounterClass_eachCase.append(el['ENCOUNTERCLASS'].values[0])
med = df['medications'][(df['medications']['PATIENT'] == conditions_top3_age['PATIENT'].values[i])
& (df['medications']['ENCOUNTER'] == conditions_top3_age['ENCOUNTER'].values[i])]
if med.shape[0] != 0:
medication_eachCase.append(med['DESCRIPTION'].values[0])
else:
medication_eachCase.append('None')
cp = df['careplans'][(df['careplans']['PATIENT'] == conditions_top3_age['PATIENT'].values[i])
& (df['careplans']['ENCOUNTER'] == conditions_top3_age['ENCOUNTER'].values[i])]
if cp.shape[0] != 0:
careplan_eachCase.append(cp['DESCRIPTION'].values[0])
else:
careplan_eachCase.append('None')
'''
imm = df['immunizations'][(df['immunizations']['PATIENT'] == conditions_top3_age['PATIENT'].values[i])
& (df['immunizations']['ENCOUNTER'] == conditions_top3_age['ENCOUNTER'].values[i])]
if imm.shape[0] != 0:
immunization_eachCase.append(imm['DESCRIPTION'].values[0])
else:
immunization_eachCase.append('None')
'''
conditions_top3_age['encounterClass'] = encounterClass_eachCase
conditions_top3_age['medication'] = medication_eachCase
conditions_top3_age['careplan'] = careplan_eachCase
# conditions_top3_age['immunization'] = immunization_eachCase
/tmp/ipykernel_55933/1431142842.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /tmp/ipykernel_55933/1431142842.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /tmp/ipykernel_55933/1431142842.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
# set(immunization_eachCase)
fig6 = make_subplots(rows = 1, cols = 3, specs=[[{'type':'domain'}]*3], subplot_titles = (top3))
fig7 = make_subplots(rows = 1, cols = 3, specs=[[{'type':'domain'}]*3])
fig8 = make_subplots(rows = 1, cols = 3, specs=[[{'type':'domain'}]*3], subplot_titles = (top3))
for i in range(3):
cond_df = conditions_top3_age[conditions_top3_age['DESCRIPTION'] == top3[i]]
ec_count = pd.DataFrame(cond_df['encounterClass'].value_counts()).reset_index()
med_count = pd.DataFrame(cond_df['medication'].value_counts()).reset_index()
cp_count = pd.DataFrame(cond_df['careplan'].value_counts()).reset_index()
fig6.add_trace(
go.Pie(values = list(ec_count['encounterClass'].values), labels = list(ec_count['index'].values)),
row = 1, col = (i+1))
fig7.add_trace(
go.Pie(values = list(med_count['medication'].values), labels = list(med_count['index'].values)),
row = 1, col = (i+1))
fig8.add_trace(
go.Pie(values = list(cp_count['careplan'].values), labels = list(cp_count['index'].values)),
row = 1, col = (i+1))
fig6.update_layout(title_text = "Encounter Class")
fig6.show()
fig7.update_layout(width = 900, height = 900, title_text = "Medications",
legend = dict(title_font_family = "Times New Roman",
font = dict(size = 10),
orientation = "h"))
fig7.show()
fig8.update_layout(width = 1050, height = 600, title_text = "Care Plans",
legend = dict(title_font_family = "Times New Roman",
font = dict(size = 10)))
fig8.show()
# df['patients'][df['patients']['DEATHDATE'].notna()]
df['careplans']['DESCRIPTION'].value_counts()
Respiratory therapy 633 Routine antenatal care 397 Diabetes self management plan 339 Lifestyle education regarding hypertension 302 Physical therapy procedure 230 Self-care interventions (procedure) 189 Musculoskeletal care 187 Wound care 185 Fracture care 169 Hyperlipidemia clinical management plan 136 Head injury rehabilitation 102 Urinary tract infection care 76 Inpatient care plan (record artifact) 59 Asthma self management 57 Heart failure self management plan 57 Cancer care plan 49 Care plan (record artifact) 46 Burn care 37 Minor surgery care management (procedure) 31 Demential management 29 Allergic disorder monitoring 29 Chronic obstructive pulmonary disease clinical management plan 27 Skin condition care 25 Care Plan 24 Overactivity/inattention behavior management 20 Major surgery care management 15 Terminal care 15 Psychiatry care plan 7 Mental health care plan 5 Dialysis care plan (record artifact) 3 Major depressive disorder clinical management plan 2 Spinal cord injury rehabilitation 1 Name: DESCRIPTION, dtype: int64
from datetime import datetime, timedelta
from collections import OrderedDict
# datetime.strptime(df['careplans']['STOP'].values[0], "%Y-%m-%d")-datetime.strptime(df['careplans']['START'].values[0], "%Y-%m-%d")
careplan_top1 = df['careplans'][df['careplans']['DESCRIPTION'] == 'Respiratory therapy']
# pd.set_option('display.max_rows', 1000)
'''
careDays = []
for i in range(careplan_top1.shape[0]):
try:
diff = datetime.strptime(careplan_top1['STOP'].values[i], "%Y-%m-%d") - datetime.strptime(careplan_top1['START'].values[i], "%Y-%m-%d")
careDays.append(diff)
except:
careDays.append(np.nan)
careplan_top1['careDays'] = careDays
np.mean(careplan_top1['careDays'])
'''
'\ncareDays = []\nfor i in range(careplan_top1.shape[0]):\n try: \n diff = datetime.strptime(careplan_top1[\'STOP\'].values[i], "%Y-%m-%d") - datetime.strptime(careplan_top1[\'START\'].values[i], "%Y-%m-%d")\n careDays.append(diff)\n except:\n careDays.append(np.nan)\ncareplan_top1[\'careDays\'] = careDays\nnp.mean(careplan_top1[\'careDays\'])\n'
careplan_top1 = careplan_top1[careplan_top1['STOP'] != '']
monthList = []
for i in range(careplan_top1.shape[0]):
try:
diff = datetime.strptime(careplan_top1['STOP'].values[i], "%Y-%m-%d") - datetime.strptime(careplan_top1['START'].values[i], "%Y-%m-%d")
monthList.append(diff)
except:
monthList.append(np.nan)
start = datetime.strptime(careplan_top1['START'].values[i], "%Y-%m-%d")
end = datetime.strptime(careplan_top1['STOP'].values[i], "%Y-%m-%d")
mon = OrderedDict(((start + timedelta(_)).strftime(r"%b%Y"), None) for _ in range((end - start).days)).keys()
monthList = monthList + list(mon)
monthList = [x for x in monthList if len(str(x)) == 7]
amount_byMonth = pd.DataFrame(monthList, columns = ['CareDate']).value_counts().reset_index()
amount_byMonth.columns = ['CareDate', 'Amount']
amount_byMonth['Year'] = [datetime.strptime(x, '%b%Y').year for x in amount_byMonth['CareDate']]
amount_byMonth['Month'] = [datetime.strptime(x, '%b%Y').month for x in amount_byMonth['CareDate']]
start = datetime.strptime('2005-01-01', "%Y-%m-%d")
end = datetime.strptime('2020-04-30', "%Y-%m-%d")
mon = list(OrderedDict(((start + timedelta(_)).strftime(r"%b%Y"), None) for _ in range((end - start).days)).keys())
amount = pd.DataFrame(mon, columns = ['CareDate']).merge(amount_byMonth[['CareDate', 'Amount']], how = 'left', on = 'CareDate')
amount = amount.fillna(0)
mu = amount['Amount'].mean()
sd = amount['Amount'].std()
amount_norm = amount.copy()
# Normalize data
amount_norm['Amount'] = (amount['Amount'] - mu) / sd
train_days = 6 #months
x = []
y = []
num = 0
for i in range(train_days, amount_norm.shape[0]):
x.append(list(amount_norm['Amount'][num:i].values))
y.append(amount_norm['Amount'][i])
num += 1
X = np.expand_dims(np.array(x), -1)
Y = np.array(y)
train_num = X.shape[0]-12
X_train = X[0:train_num]
Y_train = Y[0:train_num]
X_test = X[train_num:X.shape[0]]
Y_test = Y[train_num:Y.shape[0]]
from tensorflow.keras.layers import Input, Dense, LeakyReLU, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras import regularizers
from tensorflow.keras.layers import LSTM
from tensorflow.keras.regularizers import l1, l2
seq_input = Input(shape = (X_train.shape[1], X_train.shape[2]))
x = LSTM(128, kernel_regularizer = l2(0.002), recurrent_regularizer = l2(0.002), bias_regularizer = l2(0.002),
return_sequences = False)(seq_input)
x = Dropout(0.2)(x)
out = Dense(1, activation = 'linear')(x)
net = Model(seq_input, out)
net.summary()
Model: "model_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_3 (InputLayer) [(None, 6, 1)] 0
lstm_2 (LSTM) (None, 128) 66560
dropout_2 (Dropout) (None, 128) 0
dense_2 (Dense) (None, 1) 129
=================================================================
Total params: 66,689
Trainable params: 66,689
Non-trainable params: 0
_________________________________________________________________
net.compile(loss = 'mse', optimizer = Adam(0.001))
es = EarlyStopping(monitor = 'val_loss', mode = 'min', verbose = 1, patience = 10)
bm = ModelCheckpoint('../net_weights.hdf5', save_best_only = True,
monitor = 'val_loss', mode = 'min')
net.fit(X_train, Y_train, epochs = 200, batch_size = 8, validation_split = 0.1, callbacks = [es, bm])
Epoch 1/200 19/19 [==============================] - 3s 42ms/step - loss: 0.9256 - val_loss: 0.6269 Epoch 2/200 19/19 [==============================] - 0s 12ms/step - loss: 0.5790 - val_loss: 0.5032 Epoch 3/200 19/19 [==============================] - 0s 11ms/step - loss: 0.4838 - val_loss: 0.4835 Epoch 4/200 19/19 [==============================] - 0s 10ms/step - loss: 0.4363 - val_loss: 0.4440 Epoch 5/200 19/19 [==============================] - 0s 9ms/step - loss: 0.4022 - val_loss: 0.4198 Epoch 6/200 19/19 [==============================] - 0s 10ms/step - loss: 0.3821 - val_loss: 0.3966 Epoch 7/200 19/19 [==============================] - 0s 10ms/step - loss: 0.3628 - val_loss: 0.3749 Epoch 8/200 19/19 [==============================] - 0s 9ms/step - loss: 0.3458 - val_loss: 0.3638 Epoch 9/200 19/19 [==============================] - 0s 9ms/step - loss: 0.3263 - val_loss: 0.3506 Epoch 10/200 19/19 [==============================] - 0s 9ms/step - loss: 0.3146 - val_loss: 0.3381 Epoch 11/200 19/19 [==============================] - 0s 9ms/step - loss: 0.3034 - val_loss: 0.3200 Epoch 12/200 19/19 [==============================] - 0s 9ms/step - loss: 0.2910 - val_loss: 0.3167 Epoch 13/200 19/19 [==============================] - 0s 10ms/step - loss: 0.2744 - val_loss: 0.3000 Epoch 14/200 19/19 [==============================] - 0s 10ms/step - loss: 0.2637 - val_loss: 0.2955 Epoch 15/200 19/19 [==============================] - 0s 10ms/step - loss: 0.2576 - val_loss: 0.2825 Epoch 16/200 19/19 [==============================] - 0s 10ms/step - loss: 0.2432 - val_loss: 0.2623 Epoch 17/200 19/19 [==============================] - 0s 10ms/step - loss: 0.2372 - val_loss: 0.2587 Epoch 18/200 19/19 [==============================] - 0s 10ms/step - loss: 0.2248 - val_loss: 0.2399 Epoch 19/200 19/19 [==============================] - 0s 11ms/step - loss: 0.2206 - val_loss: 0.2287 Epoch 20/200 19/19 [==============================] - 0s 10ms/step - loss: 0.2087 - val_loss: 0.2198 Epoch 21/200 19/19 [==============================] - 0s 10ms/step - loss: 0.2003 - val_loss: 0.2131 Epoch 22/200 19/19 [==============================] - 0s 10ms/step - loss: 0.1900 - val_loss: 0.2073 Epoch 23/200 19/19 [==============================] - 0s 9ms/step - loss: 0.1877 - val_loss: 0.2182 Epoch 24/200 19/19 [==============================] - 0s 10ms/step - loss: 0.1834 - val_loss: 0.2021 Epoch 25/200 19/19 [==============================] - 0s 10ms/step - loss: 0.1697 - val_loss: 0.1964 Epoch 26/200 19/19 [==============================] - 0s 10ms/step - loss: 0.1640 - val_loss: 0.1877 Epoch 27/200 19/19 [==============================] - 0s 9ms/step - loss: 0.1643 - val_loss: 0.1765 Epoch 28/200 19/19 [==============================] - 0s 9ms/step - loss: 0.1540 - val_loss: 0.1939 Epoch 29/200 19/19 [==============================] - 0s 11ms/step - loss: 0.1493 - val_loss: 0.1710 Epoch 30/200 19/19 [==============================] - 0s 12ms/step - loss: 0.1481 - val_loss: 0.1656 Epoch 31/200 19/19 [==============================] - 0s 11ms/step - loss: 0.1390 - val_loss: 0.1654 Epoch 32/200 19/19 [==============================] - 0s 11ms/step - loss: 0.1369 - val_loss: 0.1520 Epoch 33/200 19/19 [==============================] - 0s 10ms/step - loss: 0.1305 - val_loss: 0.1577 Epoch 34/200 19/19 [==============================] - 0s 10ms/step - loss: 0.1279 - val_loss: 0.1517 Epoch 35/200 19/19 [==============================] - 0s 11ms/step - loss: 0.1231 - val_loss: 0.1463 Epoch 36/200 19/19 [==============================] - 0s 11ms/step - loss: 0.1174 - val_loss: 0.1362 Epoch 37/200 19/19 [==============================] - 0s 11ms/step - loss: 0.1151 - val_loss: 0.1390 Epoch 38/200 19/19 [==============================] - 0s 8ms/step - loss: 0.1094 - val_loss: 0.1440 Epoch 39/200 19/19 [==============================] - 0s 12ms/step - loss: 0.1111 - val_loss: 0.1320 Epoch 40/200 19/19 [==============================] - 0s 11ms/step - loss: 0.1049 - val_loss: 0.1279 Epoch 41/200 19/19 [==============================] - 0s 11ms/step - loss: 0.1026 - val_loss: 0.1305 Epoch 42/200 19/19 [==============================] - 0s 10ms/step - loss: 0.1034 - val_loss: 0.1242 Epoch 43/200 19/19 [==============================] - 0s 15ms/step - loss: 0.0969 - val_loss: 0.1200 Epoch 44/200 19/19 [==============================] - 0s 11ms/step - loss: 0.0965 - val_loss: 0.1182 Epoch 45/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0933 - val_loss: 0.1222 Epoch 46/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0906 - val_loss: 0.1075 Epoch 47/200 19/19 [==============================] - 0s 11ms/step - loss: 0.0886 - val_loss: 0.1037 Epoch 48/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0906 - val_loss: 0.1080 Epoch 49/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0849 - val_loss: 0.1072 Epoch 50/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0836 - val_loss: 0.1116 Epoch 51/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0800 - val_loss: 0.0997 Epoch 52/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0790 - val_loss: 0.1006 Epoch 53/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0749 - val_loss: 0.1022 Epoch 54/200 19/19 [==============================] - 0s 11ms/step - loss: 0.0772 - val_loss: 0.0941 Epoch 55/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0748 - val_loss: 0.1053 Epoch 56/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0748 - val_loss: 0.0965 Epoch 57/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0711 - val_loss: 0.0893 Epoch 58/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0720 - val_loss: 0.0922 Epoch 59/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0672 - val_loss: 0.0936 Epoch 60/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0654 - val_loss: 0.0913 Epoch 61/200 19/19 [==============================] - 0s 11ms/step - loss: 0.0644 - val_loss: 0.0902 Epoch 62/200 19/19 [==============================] - 0s 11ms/step - loss: 0.0628 - val_loss: 0.0890 Epoch 63/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0666 - val_loss: 0.0934 Epoch 64/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0605 - val_loss: 0.0946 Epoch 65/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0644 - val_loss: 0.0850 Epoch 66/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0621 - val_loss: 0.0855 Epoch 67/200 19/19 [==============================] - 0s 11ms/step - loss: 0.0586 - val_loss: 0.0826 Epoch 68/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0589 - val_loss: 0.0881 Epoch 69/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0575 - val_loss: 0.0839 Epoch 70/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0605 - val_loss: 0.0819 Epoch 71/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0613 - val_loss: 0.0898 Epoch 72/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0542 - val_loss: 0.0787 Epoch 73/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0569 - val_loss: 0.0775 Epoch 74/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0557 - val_loss: 0.0794 Epoch 75/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0555 - val_loss: 0.0780 Epoch 76/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0549 - val_loss: 0.0719 Epoch 77/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0566 - val_loss: 0.0772 Epoch 78/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0581 - val_loss: 0.0763 Epoch 79/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0535 - val_loss: 0.0741 Epoch 80/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0521 - val_loss: 0.0745 Epoch 81/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0530 - val_loss: 0.0744 Epoch 82/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0514 - val_loss: 0.0779 Epoch 83/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0539 - val_loss: 0.0790 Epoch 84/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0484 - val_loss: 0.0712 Epoch 85/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0492 - val_loss: 0.0715 Epoch 86/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0480 - val_loss: 0.0749 Epoch 87/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0487 - val_loss: 0.0790 Epoch 88/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0527 - val_loss: 0.0787 Epoch 89/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0579 - val_loss: 0.0721 Epoch 90/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0529 - val_loss: 0.0722 Epoch 91/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0550 - val_loss: 0.0708 Epoch 92/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0502 - val_loss: 0.0706 Epoch 93/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0459 - val_loss: 0.0764 Epoch 94/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0494 - val_loss: 0.0789 Epoch 95/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0508 - val_loss: 0.0692 Epoch 96/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0497 - val_loss: 0.0707 Epoch 97/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0476 - val_loss: 0.0775 Epoch 98/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0475 - val_loss: 0.0684 Epoch 99/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0487 - val_loss: 0.0744 Epoch 100/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0510 - val_loss: 0.0770 Epoch 101/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0508 - val_loss: 0.0744 Epoch 102/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0455 - val_loss: 0.0699 Epoch 103/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0484 - val_loss: 0.0760 Epoch 104/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0449 - val_loss: 0.0681 Epoch 105/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0455 - val_loss: 0.0687 Epoch 106/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0501 - val_loss: 0.0709 Epoch 107/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0457 - val_loss: 0.0747 Epoch 108/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0459 - val_loss: 0.0691 Epoch 109/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0462 - val_loss: 0.0690 Epoch 110/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0434 - val_loss: 0.0712 Epoch 111/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0466 - val_loss: 0.0714 Epoch 112/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0411 - val_loss: 0.0736 Epoch 113/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0462 - val_loss: 0.0700 Epoch 114/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0475 - val_loss: 0.0673 Epoch 115/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0456 - val_loss: 0.0681 Epoch 116/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0453 - val_loss: 0.0713 Epoch 117/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0432 - val_loss: 0.0776 Epoch 118/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0491 - val_loss: 0.0657 Epoch 119/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0491 - val_loss: 0.0708 Epoch 120/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0464 - val_loss: 0.0674 Epoch 121/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0451 - val_loss: 0.0706 Epoch 122/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0472 - val_loss: 0.0696 Epoch 123/200 19/19 [==============================] - 0s 8ms/step - loss: 0.0465 - val_loss: 0.0710 Epoch 124/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0467 - val_loss: 0.0748 Epoch 125/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0458 - val_loss: 0.0721 Epoch 126/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0446 - val_loss: 0.0668 Epoch 127/200 19/19 [==============================] - 0s 9ms/step - loss: 0.0466 - val_loss: 0.0748 Epoch 128/200 19/19 [==============================] - 0s 10ms/step - loss: 0.0480 - val_loss: 0.0694 Epoch 128: early stopping
<keras.callbacks.History at 0x7facf0404c40>
net.load_weights(filepath = '../net_weights.hdf5')
pred = net.predict(X_test)
result = pd.DataFrame({'CareDate':list(amount_norm['CareDate'][172:amount_norm['CareDate'].shape[0]].values), 'RealValue': Y_test * sd + mu,'Prediction': (pred * sd + mu).reshape(-1)})
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(50, 12))
ax.plot(result['CareDate'], result['Prediction'], linewidth = 3, marker = 'o', markersize = 10, label = 'prediction')
ax.plot(result['CareDate'], result['RealValue'], linewidth = 3, marker = 'o', markersize = 10, label = 'real value')
ax.set_xlabel('Date', fontsize = 30)
ax.set_ylabel('Amount of Cares', fontsize = 30)
ax.tick_params(axis = 'x', labelsize = 25)
ax.tick_params(axis = 'y', labelsize = 25)
ax.legend(fontsize = 30)
ax.grid(True)
patientD = df['patients'][df['patients']['DEATHDATE'].notna()]
patientA = df['patients'][df['patients']['DEATHDATE'].isna()]
patientA_sub = patientA.sample(n = patientD.shape[0], replace = False, random_state = 2)
patientA_sub['DEATHDATE'] = patientA_sub['DEATHDATE'].fillna('2023-04-10')
patientA_sub['ifDead'] = [0] * patientD.shape[0]
patientD['ifDead'] = [1] * patientD.shape[0]
patient_toUse = pd.concat([patientA_sub, patientD])
/tmp/ipykernel_91928/3939563891.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
patient_toUse.columns
Index(['Id', 'BIRTHDATE', 'DEATHDATE', 'SSN', 'DRIVERS', 'PASSPORT', 'PREFIX',
'FIRST', 'LAST', 'SUFFIX', 'MAIDEN', 'MARITAL', 'RACE', 'ETHNICITY',
'GENDER', 'BIRTHPLACE', 'ADDRESS', 'CITY', 'STATE', 'COUNTY', 'ZIP',
'LAT', 'LON', 'HEALTHCARE_EXPENSES', 'HEALTHCARE_COVERAGE', 'Name',
'Address', 'ifDead'],
dtype='object')
patient_col = ['Id', 'BIRTHDATE', 'DEATHDATE', 'MARITAL', 'RACE', 'ETHNICITY', 'GENDER', 'LAT', 'LON', 'HEALTHCARE_EXPENSES', 'HEALTHCARE_COVERAGE', 'ifDead']
patient_toUse = patient_toUse[patient_col].reset_index(drop = True)
patient_toUse['age'] = pd.DatetimeIndex(patient_toUse['DEATHDATE']).year - pd.DatetimeIndex(patient_toUse['BIRTHDATE']).year
# encounterClass = list(set(df['encounters']['ENCOUNTERCLASS']))
# encounterClass
encounter_count = []
medication_dispenses = []
immunization_record = []
care_record = []
procedure_record = []
for patient in patient_toUse['Id']:
encounter = []
for ec in list(set(df['encounters']['ENCOUNTERCLASS'])):
ec_count = df['encounters'][df['encounters']['PATIENT'] == patient][df['encounters']['ENCOUNTERCLASS'] == ec]
encounter.append(ec_count.shape[0])
encounter_count.append(encounter)
med_info = df['medications'][df['medications']['PATIENT'] == patient]
if med_info.shape != 0:
medication_dispenses.append(sum(med_info['DISPENSES'].values))
else:
medication_dispenses.append(0)
imm_info = df['immunizations'][df['immunizations']['PATIENT'] == patient]
immunization_record.append(imm_info.shape[0])
care_info = df['careplans'][df['careplans']['PATIENT'] == patient]
if care_info.shape[0] == 0:
care_record.append((0, 0))
else:
care_info_sub = care_info[care_info['STOP'] != '']
care_days = sum([(datetime.strptime(care_info_sub['STOP'].values[k], "%Y-%m-%d")-datetime.strptime(care_info_sub['START'].values[k], "%Y-%m-%d")).days for k in range(care_info_sub.shape[0])])
care_record.append((care_info[care_info['STOP'] == ''].shape[0], care_days))
procedure_info = df['procedures'][df['procedures']['PATIENT'] == patient]
procedure_record.append(procedure_info.shape[0])
/tmp/ipykernel_91928/1428586908.py:10: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
patient_toUse['medicationDispenses'] = medication_dispenses
patient_toUse['immunizationRecord'] = immunization_record
patient_toUse['procedureRecord'] = procedure_record
patient_toUse = patient_toUse.join(pd.DataFrame(encounter_count, columns = list(set(df['encounters']['ENCOUNTERCLASS']))))
patient_toUse = patient_toUse.join(pd.DataFrame(care_record, columns = ['longtermCareplan(times)', 'shorttermCareplan(days)']))
col_toDrop = ['Id', 'BIRTHDATE', 'DEATHDATE']
patient_toUse = patient_toUse.drop(col_toDrop, axis = 1)
to_predict = ['ifDead']
x_cols = [x for x in list(patient_toUse) if x not in to_predict]
x_cat = ['MARITAL', 'RACE', 'ETHNICITY', 'GENDER']
x_num = [x for x in x_cols if x not in x_cat]
df_onehot = pd.get_dummies(patient_toUse[x_cat],
prefix = {'MARITAL':'MARITAL', 'RACE':'RACE', 'ETHNICITY':'ETHNICITY', 'GENDER':'GENDER'},
drop_first = True)
mu = patient_toUse[x_num].mean(0)
sd = patient_toUse[x_num].std(0)
# Normalize data
df_num = (patient_toUse[x_num] - mu) / sd
Y = patient_toUse[to_predict]
X = df_num.join(df_onehot)
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.1, random_state = 0)
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
import random
# random forest
rv = np.random.randint(0, 10, x.shape[0])
clf = RandomForestClassifier(n_estimators = 100, max_depth = 50)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
# neural networks
layer_size = [128, 512, 64]
inp = Input(x_train.shape[1:])
out = inp
for ls in layer_size:
out = Dense(ls, activation = "relu")(out)
out = Dropout(0.2)(out)
out = Dense(1, activation = "sigmoid")(out)
net = Model(inp, out)
net.compile(loss = "binary_crossentropy", optimizer = Adam(0.001), metrics = ['accuracy'])
mcp_save = ModelCheckpoint('../weights.hdf5', save_best_only = True, monitor = 'val_accuracy', mode = 'max')
#callbacks=[mcp_save], validation_split=0.15,
net.fit(x_train, y_train, epochs = 250, batch_size = 16, verbose = 0, validation_split = 0.1, callbacks = [mcp_save])
test_loss, test_acc = net.evaluate(x_test, y_test)
# random guess (baseline)
y_guess = random.choices([0, 1], [0.5, 0.5], k = len(y_test))
/tmp/ipykernel_91928/3924046214.py:10: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
2/2 [==============================] - 0s 3ms/step - loss: 3.2411 - accuracy: 0.7143
print("RF Accuracy:", metrics.accuracy_score(y_test, y_pred))
print("NN Accuracy:", test_acc)
print("RG Accuracy:", metrics.accuracy_score(y_test, y_guess))
RF Accuracy: 0.7428571428571429 NN Accuracy: 0.7142857313156128 RG Accuracy: 0.4857142857142857
import shap
shap.initjs()
explainer = shap.TreeExplainer(clf)
shap_values = explainer.shap_values(x_train, approximate = False, check_additivity = False)
shap.summary_plot(shap_values[1], x_train)